import requests
from bs4 import BeautifulSoup
import os
import re
import redis


save_folder = r'D:\Downloads\kongjiewang'
domain_name = 'http://www.kongjie.com/'
url = 'http://www.kongjie.com/home.php?mod=space&do=album&view=all'
start_url = 'http://www.kongjie.com/home.php?mod=space&do=album&view=all&order=hot&page=1'
uid_picid_pattern = re.compile(r'.*?uid=(\d+).*?picid=(\d+).*?')
redis_con = redis.Redis(host='127.0.0.1',port=6379,db=0)
count = 10
i=0

def parse_album_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    album_list = soup.select('div.ptw li')
    # print(album_list)
    for album in album_list:
        save_image_in_album(album.div.a['href'])
        # print(album.div.a['href'])
    next_page = soup.select_one('a.nxt')
    if(next_page):
        parse_album_url(next_page['href'])

def save_image_in_album(album_url):
    uid_picid_match = uid_picid_pattern.search(album_url)
    if not uid_picid_match:
        return
    else:
        uid = uid_picid_match.group(1)
        picid = uid_picid_match.group(2)
    
    response = requests.get(album_url)
    soup = BeautifulSoup(response.text,'html.parser')
    img_div = soup.find('div',id='photo_pic',class_='c')
    if img_div and not redis_con.hexists('kongjie',uid+':'+picid):
        img_src = domain_name+img_div.a.img['src']
        save_img(img_src,uid,picid)
        redis_con.hset('kongjie',uid+':'+picid,'1')
    
    next_image = soup.select_one('div.pns.mlnv.vm.mtm.cl a.btn[title="下一张"]')
    if not next_image:
        return
    next_image_url = next_image['href']
    next_uid_picid_match = uid_picid_pattern.search(next_image_url)
    if not next_uid_picid_match:
        return
    next_uid = next_uid_picid_match.group(1)
    next_picid = next_uid_picid_match.group(2)
    if not redis_con.hexists('kongjie', next_uid + ':' + next_picid):
        save_images_in_album(next_image_url)

def save_img(image_url, uid, picid):
    """
    保存图片到全局变量save_folder文件夹下，图片名字为“uid_picid.ext”。
    其中，uid是用户id，picid是空姐网图片id，ext是图片的扩展名。
    """
    try:
        response = requests.get(image_url, stream=True)
        # 获取文件扩展名
        file_name_prefix, file_name_ext = os.path.splitext(image_url)
        save_path = os.path.join(save_folder, uid + '_' + picid + file_name_ext)
        with open(save_path, 'wb') as fw:
            fw.write(response.content)
        print (uid + '_' + picid + file_name_ext, 'image saved!', image_url)

    except IOError as e:
        print ('save error！', e, image_url)


parse_album_url(start_url)


